Dependencies Purposes
data.table faster read large dataset
tidyverse dplyr, tidyr, and ggplot
ggmap maps API
maps map API toolkits
mapdata map data
ggrepel manipulations of layers on maps
varhandle unfactor function
gridExtra show 2 maps side by side
options number digits to display
library(data.table) 
library(tidyverse)  
library(ggmap)      
library(maps)       
library(mapdata)    
library(ggrepel)
library(ggpubr)
library(varhandle)
library(gridExtra)
options(digits=16, warn=-1)
ggmap::register_google(key='AIzaSyAyDbezM1mVF2yr5FnpX9bJ61zEVapyjqU')
  1. Read the relevant dataset into a data frame
  2. View the data frame
# Copy the dataset to a data frame
data_path = '~/Arrest-data-from-2010-to-present.csv'
la_arrests <- as.data.frame(fread(data_path))
View(la_arrests)
  1. Select relevant variables in the dataset
  2. Clean and wrangle data (variable names, unfilled values, latittue, longitude, date)
  3. Factor variable $Charge_Group_Description
  4. Drop messy variable $Location
  5. Review the complete dataset
# Select relevant cols in the dataset
df <- select(la_arrests, `Arrest Date`, `Time`, `Age`, `Area Name`, `Sex Code`, 
             `Charge Group Description`, `Arrest Type Code`, `Location`)

# Rename the multi-word variables so that each of them does not have space
setnames(df, 
         old=c('Arrest Date', 'Area Name', 'Sex Code', 
               'Charge Group Description', 'Arrest Type Code'), 
         new=c('Arrest_Date', 'Area_Name', 'Sex_Code',
               'Charge_Group_Description', 'Arrest_Type_Code'))

# Extract latitude as double format from Location of the data frame
# Convert $Arrest_Date to Date format
df <- transform(df, 
                Latitude  = as.double(str_sub(word(Location, 2, 2), 2, -3)),
                Longitude = as.double(str_sub(word(Location, -1, -1), 2, -3)),
                Arrest_Date = as.Date(str_sub(df$Arrest_Date, 1, 10)))

# Fill emppty values in Charge_Group_Description variable with 'Unknown'
df$Charge_Group_Description[df$Charge_Group_Description==''] <- 'Unknown'

# Factor and sort $Charge_Group_Description
df <- within(df, Charge_Group_Description 
                 <- factor(Charge_Group_Description, 
                    levels=names(sort(table(Charge_Group_Description)))))

# Remove Location off the data frame
df <- select(df, -Location)

# Inpsect the data frame
View(df)
glimpse(df)
## Observations: 1,276,160
## Variables: 9
## $ Arrest_Date              <date> 2019-06-22, 2019-06-22, 2019-06-22, 20…
## $ Time                     <int> 1630, 1010, 400, 302, 1240, 800, 40, 83…
## $ Age                      <int> 44, 8, 31, 23, 28, 13, 31, 40, 20, 14, …
## $ Area_Name                <chr> "Pacific", "West Valley", "N Hollywood"…
## $ Sex_Code                 <chr> "M", "M", "F", "F", "M", "M", "M", "M",…
## $ Charge_Group_Description <fct> Miscellaneous Other Violations, Unknown…
## $ Arrest_Type_Code         <chr> "M", "O", "M", "M", "F", "D", "F", "M",…
## $ Latitude                 <dbl> 33.9920, 34.1687, 34.1649, 34.2692, 33.…
## $ Longitude                <dbl> -118.4201, -118.5579, -118.3965, -118.4…
p_colors <- c('#FC0BF0','#52FD4D','#F4B942','#350113','#F90359','#320EFC','#18B4B2')
p_colors <- c(rep(p_colors, 4))
ggplot(df, aes(x=Charge_Group_Description)) +
  geom_bar(color='black', fill=p_colors) +
  geom_bar(mapping=aes(x=Charge_Group_Description, y=..prop.., group=1),  
           stat='count', fill=p_colors) +
  geom_text(aes(label=paste(round(stat(..prop..)*100, 1),'%', sep=''), group=1), 
            stat='count', size=3.5, hjust=-0.07, color=p_colors, fontface='bold') +
  coord_flip() +
  labs(title='Los Angeles Drug Violations By Year\n(January, 2010 - June, 2019)', 
       caption='Source: https://data.lacity.org') +
  theme(axis.text.y=element_text(color=p_colors, size=12, face='bold'),
        axis.text.x=element_text(color='red', size=12, face='bold'),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        plot.title=element_text(color='#52FD4D', face='bold', hjust=0.5)) 

drug_by_year <- data.frame(Year=integer(), Counts=double())
year_span <- 2010:2019
for (year in year_span) {
  cnts <- nrow(select(filter(df, Charge_Group_Description=='Narcotic Drug Laws', 
                                 as.numeric(format(Arrest_Date,'%Y'))==year)))
  drug_by_year[nrow(drug_by_year) + 1, ] = list(year, cnts)
}

ggplot(drug_by_year, aes(x=Year, y=Counts)) +
  geom_point(size=6, color='#52FD4D') +
  geom_segment(aes(x=Year, xend=Year, 
                   y=0, yend=Counts), color='#52FD4D') +
  scale_x_continuous(breaks=year_span) +
  geom_smooth(method='lm', se=FALSE, color='#FC0BF0') +
  labs(title='Los Angeles Drug Violations By Year\n(January, 2010 - June, 2019)', 
       caption='Source: https://data.lacity.org') +
  theme(axis.text.y=element_text(color='red', size=12, face='bold'),
        axis.text.x=element_text(color='red', size=12, face='bold'),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        plot.title=element_text(color='#52FD4D', face='bold', hjust=0.5)) 

map_plot <- function(dataset, center) {
m <- ggmap(get_googlemap(center=c(lon=center[1], lat=center[2]), 
                         maptype='roadmap', scale=2, zoom=10))
m + 
  geom_point(aes(x=Longitude, y=Latitude), 
               data=dataset, color='#F90359', size=1) +
  stat_density2d(data=dataset, 
                 aes(x=Longitude, y=Latitude, alpha=..level..),
                 size=0.01, bins=8, geom='polygon') +
  scale_fill_gradient(low='#18B4B2', high='#F90359') +
  scale_alpha(range=c(0.5, 0.8), guide=FALSE)
}
  1. Create a list of data frames which contains the terms ‘Narcotic Drug Laws’ and years of interest
  2. Feed the each of data frames in the list in the plot function to draw its map
drugs <- list()
d_plot <- list()
for(year in year_span) {
  drugs[[year]] <- select(filter(df, Charge_Group_Description=='Narcotic Drug Laws', 
                          str_sub(Arrest_Date, 1, 4)==as.character(year)), 
                          `Latitude`, `Longitude`)
  d_plot[[year]] <- map_plot(drugs[[year]], c(-118.3533783, 34.0274463)) +
                          annotate('text', x=-118.1, y=34.35, 
                                   label=year, color='#FC0BF0', size=12)
}
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx
## Source : https://maps.googleapis.com/maps/api/staticmap?center=34.027446,-118.353378&zoom=10&size=640x640&scale=2&maptype=roadmap&key=xxx